function nn = nnff(nn, x, y)
%NNFF performs a feedforward pass
% nn = nnff(nn, x, y) returns an neural network structure with updated
% layer activations, error and loss (nn.a, nn.e and nn.L)

    n = nn.n;
    m = size(x, 1);
    
    x = [ones(m,1) x];
    nn.a{1} = x;

    % feedforward pass
    for i = 2 : n-1
        switch nn.activation_function 
            case 'sigm'
                % Calculate the unit's outputs (including the bias term)
                nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}');
            case 'opttanh'
                nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}');
            case 'tanh'
                nn.a{i} = tanh(nn.a{i - 1} * nn.W{i - 1}');
            case 'ReLU'
                nn.a{i} = max(0, nn.a{i - 1} * nn.W{i - 1}');
            case 'softplus'
                nn.a{i} = log(1 + exp(nn.a{i - 1} * nn.W{i - 1}'));
        end
        
        % dropout
        if(nn.dropoutFraction > 0)
            if(nn.testing)
                nn.a{i} = nn.a{i}.*(1 - nn.dropoutFraction); % mean model, compensate
            else
                nn.dropOutMask{i} = (rand(size(nn.a{i}))>nn.dropoutFraction);
                nn.a{i} = nn.a{i}.*nn.dropOutMask{i};
            end
        end
        
        % calculate running exponential activations for use with sparsity
        if(nn.nonSparsityPenalty>0)
            nn.p{i} = 0.99 * nn.p{i} + 0.01 * mean(nn.a{i}, 1);
        end
        
        % Add the bias term
        nn.a{i} = [ones(m,1) nn.a{i}];
%assert(any(any(isinf(nn.a{i})))==0, [num2str(i-1) '-' num2str(i) ':' mat2str(nn.a{i})]);
    end
    
    switch nn.output 
        case 'sigm'
            nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}');
        case 'linear'
            nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';
        case 'softmax'
            nn.a{n} = softmax(nn.a{n - 1} * nn.W{n - 1}');
    end
    
    % error and loss
    nn.e = y - nn.a{n};
    switch nn.output
        case {'sigm', 'linear'}
            nn.L = 1/2 * sum(sum(nn.e .^ 2)) / m; % square loss
        case 'softmax'
            nn.a{n}(nn.a{n} < 1e-6) = 1e-6;
            nn.L = -sum(sum(y .* log(nn.a{n}))) / m; % cross-entropy loss
    end
    % in nntrain: L(n) = nn.L;
end
